import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import plotly.express as px
df = pd.read_excel("../ME22_Concentration_Particles_Raw data.xlsx")
df
| Sample | Particle Id | Group | Score | PM80-1 (µg/m3) | Particle # conc. (#particle/m3) | CmNormPM10-PM2.5 (µg/m3) | Mass deposition (µg/cm2/s) | Number deposition (#particles/cm2/s) | MassMicrogram (µg) | ... | Th | Pa | U | Np | Pu | Am | Cm | Bk | Cf | Es | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | BO22_Car_p1_sec. analysis | 16 | Biogenic_Organic | 0.961 | 0.048785 | 298.552352 | 0.042244 | 6.920258e-15 | 0.000042 | 0.000163 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | BO22_Car_p1_sec. analysis | 22 | Biogenic_Organic | 0.972 | 0.042139 | 400.144760 | 0.038623 | 4.459923e-15 | 0.000042 | 0.000105 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | BO22_Car_p1_sec. analysis | 130 | Biogenic_Organic | 0.963 | 0.030344 | 771.693922 | 0.028766 | 1.665273e-15 | 0.000042 | 0.000039 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | BO22_Car_p1_sec. analysis | 132 | Biogenic_Organic | 0.963 | 0.023648 | 1270.575606 | 0.019901 | 7.882287e-16 | 0.000042 | 0.000019 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | BO22_Car_p1_sec. analysis | 150 | Biogenic_Organic | 0.780 | 0.021368 | 1556.176408 | 0.015646 | 5.815202e-16 | 0.000042 | 0.000014 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7415 | ME22_TLY_P2 | 1446 | Tire wear | 0.973 | 0.035577 | 136.383237 | 0.023998 | 4.658701e-15 | 0.000032 | 0.000261 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7416 | ME22_TLY_P2 | 1500 | Tire wear | 0.989 | 0.024097 | 297.297857 | 0.022019 | 1.447500e-15 | 0.000032 | 0.000081 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7417 | ME22_TLY_P2 | 1614 | Tire wear | 0.384 | 0.029359 | 200.269512 | 0.024489 | 2.618087e-15 | 0.000032 | 0.000147 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7418 | ME22_TLY_P2 | 1662 | Tire wear | 0.964 | 0.017879 | 540.049822 | 0.016994 | 5.912288e-16 | 0.000032 | 0.000033 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7419 | ME22_TLY_P2 | 1701 | Tire wear | 0.801 | 0.016186 | 658.881086 | 0.015200 | 4.387284e-16 | 0.000032 | 0.000025 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
7420 rows × 144 columns
df.Group.value_counts().plot(kind='bar')
<AxesSubplot:>
df
| Sample | Particle Id | Group | Score | PM80-1 (µg/m3) | Particle # conc. (#particle/m3) | CmNormPM10-PM2.5 (µg/m3) | Mass deposition (µg/cm2/s) | Number deposition (#particles/cm2/s) | MassMicrogram (µg) | ... | Th | Pa | U | Np | Pu | Am | Cm | Bk | Cf | Es | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | BO22_Car_p1_sec. analysis | 16 | Biogenic_Organic | 0.961 | 0.048785 | 298.552352 | 0.042244 | 6.920258e-15 | 0.000042 | 0.000163 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | BO22_Car_p1_sec. analysis | 22 | Biogenic_Organic | 0.972 | 0.042139 | 400.144760 | 0.038623 | 4.459923e-15 | 0.000042 | 0.000105 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | BO22_Car_p1_sec. analysis | 130 | Biogenic_Organic | 0.963 | 0.030344 | 771.693922 | 0.028766 | 1.665273e-15 | 0.000042 | 0.000039 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | BO22_Car_p1_sec. analysis | 132 | Biogenic_Organic | 0.963 | 0.023648 | 1270.575606 | 0.019901 | 7.882287e-16 | 0.000042 | 0.000019 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | BO22_Car_p1_sec. analysis | 150 | Biogenic_Organic | 0.780 | 0.021368 | 1556.176408 | 0.015646 | 5.815202e-16 | 0.000042 | 0.000014 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7415 | ME22_TLY_P2 | 1446 | Tire wear | 0.973 | 0.035577 | 136.383237 | 0.023998 | 4.658701e-15 | 0.000032 | 0.000261 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7416 | ME22_TLY_P2 | 1500 | Tire wear | 0.989 | 0.024097 | 297.297857 | 0.022019 | 1.447500e-15 | 0.000032 | 0.000081 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7417 | ME22_TLY_P2 | 1614 | Tire wear | 0.384 | 0.029359 | 200.269512 | 0.024489 | 2.618087e-15 | 0.000032 | 0.000147 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7418 | ME22_TLY_P2 | 1662 | Tire wear | 0.964 | 0.017879 | 540.049822 | 0.016994 | 5.912288e-16 | 0.000032 | 0.000033 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7419 | ME22_TLY_P2 | 1701 | Tire wear | 0.801 | 0.016186 | 658.881086 | 0.015200 | 4.387284e-16 | 0.000032 | 0.000025 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
7420 rows × 144 columns
Check a histogram for the score value
Chemical distributions of morphochemical groups
pd.set_option('display.max_columns', None)
mineral_df = df[df.Group == 'Mineral']
chemic_features = ["Si", "C", "O", "P", "Cl", "Na", "Ca", "Al", "Fe"]
columns_list = list(df.columns)
H_idx = columns_list.index('H')
mineral_chemic = mineral_df.iloc[:, H_idx:columns_list.index('Bi')]
mineral_chemic.loc[:, (df != 0).any(axis=0)].boxplot(figsize=(18, 6))
<AxesSubplot:>
metallic_df = df[df.Group == 'Metallic']
columns_list = list(df.columns)
H_idx = columns_list.index('H')
metallic_chemic = metallic_df.iloc[:, H_idx:columns_list.index('Bi')]
metallic_chemic.loc[:, (df != 0).any(axis=0)].boxplot(figsize=(18, 6))
<AxesSubplot:>
tire_df = df[df.Group == 'Tire wear']
columns_list = list(df.columns)
H_idx = columns_list.index('H')
tire_chemic = tire_df.iloc[:, H_idx:columns_list.index('Bi')]
tire_chemic.loc[:, (df != 0).any(axis=0)].boxplot(figsize=(18, 6))
<AxesSubplot:>
organic_df = df[df.Group == 'Biogenic_Organic']
columns_list = list(df.columns)
H_idx = columns_list.index('H')
organic_chemic = organic_df.iloc[:, H_idx:columns_list.index('Bi')]
organic_chemic.loc[:, (df != 0).any(axis=0)].boxplot(figsize=(18, 6))
<AxesSubplot:>
H_idx = columns_list.index('H')
#grp = df.iloc[:, H_idx:columns_list.index('Bi')]
#grp['Group'] = df['Group'].copy()
grp = df[chemic_features + ['Group']]
grp = grp.groupby('Group', group_keys=False).apply(lambda x: x.sample(50, replace=True)).copy()
vis = grp.copy()
vis.Group = vis.Group.astype('category').cat.codes
fig = px.parallel_coordinates(vis.reset_index().drop('index', axis=1), color = "Group",
range_color=[0, 5],color_continuous_scale=[(0.00, "grey"), (0.16, "grey"),
(0.16, "red"), (0.33, "red"),
(0.33, "blue"), (0.49, "blue"),
(0.49, "yellow"), (0.65, "yellow"),
(0.65, "green"), (0.81, "green"),
(0.81, "orange"), (1, "orange")])
fig.update_layout(coloraxis_colorbar=dict(
title="Group",
tickvals=[0,1,2,3,4,5],
ticktext=list(grp.Group.unique()),
lenmode="pixels", len=100,
))
Inspect distribution of morphological features in each group.
morph_features = ["GrayMean", "Solidity", "Irregularity", "ShapeFactor", "LengthMicron", "Eccentricity", "GrayKurt", "FractalDimR", "AreaMicron2", "GrayStd", "Grain3"]
organic_morph = organic_df[morph_features]
organic_morph["AreaMicron2"] = organic_morph["AreaMicron2"].transform('log')
organic_morph["Irregularity"] = organic_morph["Irregularity"].transform('log')
organic_morph["LengthMicron"] = organic_morph["LengthMicron"].transform('log')
organic_morph.boxplot(figsize=(18, 6))
<AxesSubplot:>
metallic_morph = metallic_df[morph_features]
metallic_morph["AreaMicron2"] = metallic_morph["AreaMicron2"].transform('log')
metallic_morph["Irregularity"] = metallic_morph["Irregularity"].transform('log')
metallic_morph["LengthMicron"] = metallic_morph["LengthMicron"].transform('log')
metallic_morph.boxplot(figsize=(18, 6))
<AxesSubplot:>
tire_morph = tire_df[morph_features]
tire_morph["AreaMicron2"] = tire_morph["AreaMicron2"].transform('log')
tire_morph["Irregularity"] = tire_morph["Irregularity"].transform('log')
tire_morph["LengthMicron"] = tire_morph["LengthMicron"].transform('log')
tire_morph.boxplot(figsize=(18, 6))
<AxesSubplot:>
mineral_morph = mineral_df[morph_features]
mineral_morph["AreaMicron2"] = mineral_morph["AreaMicron2"].transform('log')
mineral_morph["Irregularity"] = mineral_morph["Irregularity"].transform('log')
mineral_morph["LengthMicron"] = mineral_morph["LengthMicron"].transform('log')
mineral_morph.boxplot(figsize=(18, 6))
<AxesSubplot:>
grp = df[morph_features + ['Group']]
grp["AreaMicron2"] = grp["AreaMicron2"].transform('log')
grp["Irregularity"] = grp["Irregularity"].transform('log')
grp["LengthMicron"] = grp["LengthMicron"].transform('log')
grp = grp.groupby('Group', group_keys=False).apply(lambda x: x.sample(50, replace=True)).copy()
vis = grp.copy()
vis.Group = vis.Group.astype('category').cat.codes
fig = px.parallel_coordinates(vis.reset_index().drop('index', axis=1), color = "Group",
range_color=[0, 5],color_continuous_scale=[(0.00, "grey"), (0.16, "grey"),
(0.16, "red"), (0.33, "red"),
(0.33, "blue"), (0.49, "blue"),
(0.49, "yellow"), (0.65, "yellow"),
(0.65, "green"), (0.81, "green"),
(0.81, "orange"), (1, "orange")])
fig.update_layout(coloraxis_colorbar=dict(
title="Group",
tickvals=[0,1,2,3,4,5],
ticktext=list(grp.Group.unique()),
lenmode="pixels", len=100,
))
import matplotlib.pyplot as plt
lengt_df = df[["Group", "LengthMicron"]]
#lengt_df["LengthMicron"] = lengt_df["LengthMicron"].transform('log')
plt.yscale("log")
lengt_df[["Group", "LengthMicron"]].groupby('Group').boxplot(subplots=False, figsize=(15, 8))
_ =plt.xticks(rotation = 15)
df.Sample.value_counts()
ME22_MA_P1 1218 ME22_MA_P2 1023 ME22_TG_P2 982 ME22_TLY_P2 962 BO22_Car_p1_sec. analysis 892 CA22_UV_p1_sec. analysis 871 ME22_MA_P3 851 ME22_TG_P1 333 ME22_TLY_P1 288 Name: Sample, dtype: int64
_, axes = plt.subplots(3,3, figsize=(15, 8), constrained_layout=True)
axes = axes.ravel()
for station, ax in zip(df.Sample.unique(), axes):
df[df.Sample == station].Group.value_counts().plot(kind="bar", ax= ax, title=station)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 10)
import plotly.express as px
vis = df.groupby(['Sample', 'Group']).Score.count().reset_index(name='count')
figGeneral = px.pie(vis, values='count', names='Group', title='Poporcion de grupos por estacion',
facet_col='Sample', facet_col_wrap=3) # only plotly 5.9
figGeneral.update_layout(
autosize=False,
width=900,
height=900)
import plotly
plotly.__version__
'5.9.0'
import inspect
inspect.getfullargspec(px.pie)
FullArgSpec(args=['data_frame', 'names', 'values', 'color', 'facet_row', 'facet_col', 'facet_col_wrap', 'facet_row_spacing', 'facet_col_spacing', 'color_discrete_sequence', 'color_discrete_map', 'hover_name', 'hover_data', 'custom_data', 'category_orders', 'labels', 'title', 'template', 'width', 'height', 'opacity', 'hole'], varargs=None, varkw=None, defaults=(None, None, None, None, None, None, 0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'plotly.graph_objs._figure.Figure'>})
px.histogram(df, x="Sample", color="Group", barmode='group', title='Conteo de grupos por estacion')
LLanta aparece con mayor proporcion en las 3 muestras de museo de antioquia seguido de las etaciones de bogota y la muestra numero 2 de tanques la Y. Esto puede indicar la presencia de alto flujo vehicular en estos muestreos. Particulas metalicas aparecen con distribucion similar en los 3 sitios teniendo un pico en la segunda muestra de tanques la Y, indicando alto flujo vehicular descendiente de la pendiente durante el tiempo de muestreo.
Minerales aparecen con mayor proporcion en las estaciones urbanas sugiriendo resuspencion de suelos, con excepcion de tanques G2 donde se presenta un pico que puede indicar origenes de actividades de construccion. Organicos aparecen en proporciones similares en todas las estaciones mostrando un pico en tanques y2.
sales y Ti Rich aparecen solo en estaciones urbanas con mayor en medida en MA donde se realizan muchas actividades de carpinteria y talleres de de autos.
cluster particles by features with T-sne dim reduction
features = morph_features + chemic_features
#features.append("Group")
cl_df = df[features]
cl_df["AreaMicron2"] = cl_df["AreaMicron2"].transform('log')
cl_df["Irregularity"] = cl_df["Irregularity"].transform('log')
cl_df["LengthMicron"] = cl_df["LengthMicron"].transform('log')
cl_df
| GrayMean | Solidity | Irregularity | ShapeFactor | LengthMicron | Eccentricity | GrayKurt | FractalDimR | AreaMicron2 | GrayStd | Grain3 | Si | C | O | P | Cl | Na | Ca | Al | Fe | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.214 | 0.782 | 1.779181 | 4.220 | 2.793065 | 0.975 | -1.009 | 1.042 | 3.587400 | 0.041 | 0.025 | 0.000 | 58.245 | 38.407 | 1.272 | 0.293 | 0.000 | 0.000 | 0.000 | 0.000 |
| 1 | 0.234 | 0.863 | 1.495373 | 2.724 | 2.570549 | 0.972 | -0.985 | 1.013 | 3.294503 | 0.052 | 0.010 | 0.000 | 58.868 | 36.102 | 1.123 | 0.505 | 0.354 | 0.000 | 0.000 | 0.000 |
| 2 | 0.201 | 0.956 | 0.273837 | 1.141 | 1.545006 | 0.541 | -0.133 | 1.012 | 2.637842 | 0.026 | 0.000 | 0.000 | 66.937 | 31.014 | 0.658 | 0.000 | 0.309 | 0.000 | 0.000 | 0.000 |
| 3 | 0.238 | 0.973 | 0.413433 | 1.087 | 1.344951 | 0.721 | 0.086 | 1.001 | 2.139125 | 0.042 | 0.000 | 0.000 | 56.770 | 32.504 | 0.956 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| 4 | 0.227 | 0.973 | 0.093490 | 1.010 | 1.099612 | 0.267 | 2.696 | 1.001 | 1.936148 | 0.029 | 0.000 | 0.270 | 85.876 | 13.854 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7415 | 0.315 | 0.796 | 0.954357 | 2.153 | 2.265714 | 0.767 | 0.673 | 1.015 | 3.507118 | 0.092 | 0.016 | 10.747 | 34.808 | 33.543 | 0.000 | 0.000 | 0.486 | 1.759 | 5.756 | 10.721 |
| 7416 | 0.300 | 0.917 | 0.509825 | 1.274 | 1.640549 | 0.703 | -0.146 | 1.008 | 2.727918 | 0.081 | 0.023 | 5.470 | 44.663 | 32.910 | 0.221 | 0.000 | 0.298 | 2.212 | 2.763 | 8.667 |
| 7417 | 0.203 | 0.664 | 1.010873 | 5.696 | 2.006199 | 0.524 | 9.749 | 1.066 | 3.123202 | 0.064 | 0.012 | 9.509 | 36.093 | 31.405 | 0.000 | 0.000 | 0.519 | 0.783 | 4.940 | 11.889 |
| 7418 | 0.312 | 0.829 | 0.692647 | 1.789 | 1.420937 | 0.395 | 3.081 | 1.035 | 2.130966 | 0.084 | 0.032 | 4.181 | 32.476 | 35.681 | 0.000 | 0.000 | 0.508 | 12.860 | 1.610 | 10.304 |
| 7419 | 0.285 | 0.895 | 1.121678 | 1.648 | 1.565277 | 0.924 | 0.298 | 1.011 | 1.932535 | 0.065 | 0.026 | 6.255 | 51.504 | 26.648 | 0.000 | 0.000 | 0.461 | 0.500 | 3.502 | 8.516 |
7420 rows × 20 columns
#import the method
from sklearn.manifold import TSNE
#set the hyperparmateres
keep_dims = 2
#lrn_rate = 700
prp = 40
#extract the data as a cop
tsnedf = cl_df.copy()
#creae the model
tsne = TSNE(n_components = keep_dims,
perplexity = prp,
random_state = 42,
n_iter = 5000,
n_jobs = -1)
#apply it to the data
X_dimensions = tsne.fit_transform(tsnedf)
#check the shape
X_dimensions.shape
(7420, 2)
X_dimensions
array([[-106.354195, -18.820627],
[-103.93233 , -10.525301],
[ -94.231544, 32.022583],
...,
[ -17.325533, 5.60417 ],
[ -18.602459, -26.244287],
[ -49.345295, 10.416051]], dtype=float32)
tsnedf_res = pd.DataFrame(data = X_dimensions,
columns = ["Dimension 1",
"Dimension 2"])
tsnedf_res["Group"] = df.Group.copy()
tsnedf_res
| Dimension 1 | Dimension 2 | Group | |
|---|---|---|---|
| 0 | -106.354195 | -18.820627 | Biogenic_Organic |
| 1 | -103.932327 | -10.525301 | Biogenic_Organic |
| 2 | -94.231544 | 32.022583 | Biogenic_Organic |
| 3 | -78.440651 | -7.695202 | Biogenic_Organic |
| 4 | -17.732616 | 87.539787 | Biogenic_Organic |
| ... | ... | ... | ... |
| 7415 | -12.658135 | -7.580052 | Tire wear |
| 7416 | -37.999176 | -4.604805 | Tire wear |
| 7417 | -17.325533 | 5.604170 | Tire wear |
| 7418 | -18.602459 | -26.244287 | Tire wear |
| 7419 | -49.345295 | 10.416051 | Tire wear |
7420 rows × 3 columns
import seaborn as sns
#plot the result
g = sns.jointplot(data = tsnedf_res,
x = "Dimension 1",
y = "Dimension 2",
hue = "Group")
cl_df["Group"] = df.Group.copy()
cl_df['Score'] = df.Score.copy()
import statsmodels.api as sm
import numpy as np
data = cl_df.iloc[:, :-2]
data = data.join(pd.get_dummies(cl_df.Group))
Biogenic_Organic
X = data.iloc[:, :-6]
y = data['Metallic']
X = sm.add_constant(X, prepend = False)
lg= sm.Logit(y, X)
result = lg.fit()
print(result.summary())
Optimization terminated successfully.
Current function value: inf
Iterations 13
Logit Regression Results
==============================================================================
Dep. Variable: Metallic No. Observations: 7420
Model: Logit Df Residuals: 7399
Method: MLE Df Model: 20
Date: Wed, 02 Nov 2022 Pseudo R-squ.: inf
Time: 20:57:49 Log-Likelihood: -inf
converged: True LL-Null: 0.0000
Covariance Type: nonrobust LLR p-value: 1.000
================================================================================
coef std err z P>|z| [0.025 0.975]
--------------------------------------------------------------------------------
GrayMean 15.3646 4.494 3.419 0.001 6.557 24.172
Solidity -6.3940 6.153 -1.039 0.299 -18.454 5.666
Irregularity -1.6327 2.147 -0.760 0.447 -5.842 2.576
ShapeFactor 0.1312 0.199 0.660 0.510 -0.259 0.521
LengthMicron -1.5961 3.745 -0.426 0.670 -8.936 5.744
Eccentricity 1.8351 1.797 1.021 0.307 -1.687 5.357
GrayKurt 0.0134 0.115 0.116 0.908 -0.213 0.239
FractalDimR 14.9378 9.844 1.517 0.129 -4.357 34.233
AreaMicron2 0.4625 1.906 0.243 0.808 -3.274 4.199
GrayStd 25.5181 6.130 4.163 0.000 13.503 37.533
Grain3 1.5785 6.751 0.234 0.815 -11.653 14.810
Si -0.2225 0.041 -5.415 0.000 -0.303 -0.142
C -0.1194 0.020 -5.842 0.000 -0.159 -0.079
O -0.4178 0.053 -7.878 0.000 -0.522 -0.314
P -0.3466 0.132 -2.621 0.009 -0.606 -0.087
Cl -0.1973 0.265 -0.746 0.456 -0.716 0.321
Na -0.2062 0.270 -0.765 0.444 -0.735 0.322
Ca -0.1192 0.033 -3.578 0.000 -0.185 -0.054
Al -0.1881 0.056 -3.349 0.001 -0.298 -0.078
Fe 0.2789 0.031 8.984 0.000 0.218 0.340
const -5.5269 14.164 -0.390 0.696 -33.288 22.234
================================================================================
Possibly complete quasi-separation: A fraction 0.75 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified.
/home/lecun/.local/lib/python3.8/site-packages/statsmodels/base/model.py:547: HessianInversionWarning: Inverting hessian failed, no bse or cov_params available /home/lecun/.local/lib/python3.8/site-packages/statsmodels/base/model.py:547: HessianInversionWarning: Inverting hessian failed, no bse or cov_params available
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(cl_df.iloc[:, :-2], cl_df.Group, test_size=0.30, random_state=42)
print(len(X_train), len(X_test))
5194 2226
from sklearn.linear_model import LogisticRegression
sklg = LogisticRegression(multi_class='multinomial', solver='lbfgs', penalty='none')
sklg.fit(X_train, y_train)
pred = sklg.predict(X_test)
from sklearn.metrics import ConfusionMatrixDisplay, classification_report, confusion_matrix
print(classification_report(y_test, pred))
precision recall f1-score support
Biogenic_Organic 0.95 0.97 0.96 719
Metallic 0.97 0.91 0.94 136
Mineral 0.95 0.97 0.96 1109
Salt 0.50 0.25 0.33 4
Ti-rich Paint 0.00 0.00 0.00 19
Tire wear 0.85 0.78 0.82 239
accuracy 0.94 2226
macro avg 0.70 0.65 0.67 2226
weighted avg 0.93 0.94 0.93 2226
confusion_matrix(y_test, pred)
array([[ 697, 0, 7, 0, 2, 13],
[ 2, 124, 9, 0, 1, 0],
[ 7, 3, 1080, 1, 0, 18],
[ 1, 0, 2, 1, 0, 0],
[ 18, 0, 0, 0, 0, 1],
[ 12, 1, 39, 0, 0, 187]])
ConfusionMatrixDisplay.from_predictions(y_test, pred, cmap='Blues')
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x7f4719f64550>
#Run this command in the appropied environment.
!torchrun classification/train.py --test-only --model=resnet18 --data-path=/media/lecun/HD/Expor2/ParticlesDB/folders --resume=./classification/models/model_6.pth
Loading data
Loading training data
Took 0.011496543884277344
Loading validation data
Creating data loaders
/media/lecun/HD/Expor2/Particle-classifier/classification/env/lib/python3.8/site-packages/torch/utils/data/dataloader.py:563: UserWarning: This DataLoader will create 16 worker processes in total. Our suggested max number of worker in current system is 8, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
warnings.warn(_create_warning_msg(
Creating model
Test: [ 0/59] eta: 0:01:47 loss: 0.1661 (0.1661) acc1: 96.8750 (96.8750) acc5: 100.0000 (100.0000) time: 1.8303 data: 1.0699 max mem: 257
Test: Total time: 0:00:03
Test: Acc@1 84.797 Acc@5 99.679
[[598 2 111 2]
[ 2 94 17 1]
[ 9 17 753 51]
[ 7 0 65 139]]
precision recall f1-score support
Biogenic_Organic 0.97 0.84 0.90 713
Metallic 0.83 0.82 0.83 114
Mineral 0.80 0.91 0.85 830
Tire wear 0.72 0.66 0.69 211
accuracy 0.85 1868
macro avg 0.83 0.81 0.82 1868
weighted avg 0.86 0.85 0.85 1868
cm = [[598,2,111,2],
[2,94,17,1],
[9,17,753,51],
[7,0,65,139]]
disp = ConfusionMatrixDisplay(confusion_matrix=np.array(cm))
disp.plot(cmap='Blues')
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x7f46b42d3c70>